/*
Copyright 2008-2009 Elöd Egyed-Zsigmond, Cyril Laitang
Copyright 2009-2011 Samuel Gesche

This file is part of IPRI News Analyzer.

IPRI News Analyzer is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

IPRI News Analyzer is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with IPRI News Analyzer.  If not, see <http://www.gnu.org/licenses/>.
*/

package data.base.connectors;

import data.base.Database;
import data.base.NoBaseException;

import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;

public class SemanticDatabase {
    private String remoteService, foaf, rdfs, rdf;
    private Database myDB;
    public SemanticDatabase(Database db){
        myDB = db;
        remoteService = "http://dbpedia.org/sparql";
        //prefix pour signifier 'de type'
        rdfs = "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>";
        foaf = "PREFIX foaf: <http://xmlns.com/foaf/0.1/>";
        rdf = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>";

    }

    public String sendQuery(String query){
        String result = "";
        try {
           String myQuery = foaf+" "+ rdfs+" "+rdf+" "+query;
            Query yourSparqlQuery = QueryFactory.create(myQuery);
            // configuration to access the online endpoint
            QueryExecution qehttp = QueryExecutionFactory.sparqlService(remoteService, yourSparqlQuery);
            ResultSet rs = qehttp.execSelect();
            while(rs.hasNext()){
                //QuerySolution soln = (QuerySolution) rs.next();
                result += rs.next().toString();
            }
            qehttp.close();
        }
        catch(Exception e) {
            result = "Echec de l'interrogation : "+e.getMessage();
        }
        return result;
    }

    public ArrayList getCandidatesURI(String unknown, String ressource){
        ArrayList result = new ArrayList();
        try {
           String myQuery = foaf+" "+ rdfs+" "+rdf+
                                " SELECT * WHERE { ?x rdfs:label ?label." +
                                " FILTER (lang(?label) = 'fr') " +
                                " ?x rdf:type <"+ressource+">." +
//                                " ?x rdf:type <http://dbpedia.org/ontology/OfficeHolder>." +
                                " FILTER (regex(?label, '"+unknown+"', 'i'))" +
//                                " FILTER (regex(?label, 'obama', 'i'))" +
                                " } LIMIT 40";
            Query yourSparqlQuery = QueryFactory.create(myQuery);
            // configuration to access the online endpoint
            QueryExecution qehttp = QueryExecutionFactory.sparqlService(remoteService, yourSparqlQuery);
            ResultSet rs = qehttp.execSelect();
            while(rs.hasNext()){
                QuerySolution soln = (QuerySolution) rs.next();
                result.add(soln.get("x").toString());
//                System.out.println(soln.getResource("x")+" ou bien "+soln.get("x"));
            }
            qehttp.close();
        }
        catch(Exception e) {
            System.out.println("echec "+e.getMessage());
        }
        return result;
    }

    public boolean isCategorieDe(String categorie, String concept){
        //String result = "";
        String myQuery = foaf+" "+ rdfs+" "+rdf+
                                " SELECT * WHERE { ?x rdfs:label ?label." +
                                " FILTER (lang(?label) = 'fr') " +
                                " ?x rdf:type <"+categorie+">." +
                                " FILTER (regex(?label, '"+concept+"', 'i'))" +
                                " } LIMIT 40";
        System.out.println(myQuery);
        Query yourSparqlQuery = QueryFactory.create(myQuery);
        // configuration to access the online endpoint
        QueryExecution qehttp = QueryExecutionFactory.sparqlService(remoteService, yourSparqlQuery);
        ResultSet rs = qehttp.execSelect();
        boolean result = rs.hasNext();
        System.out.println(categorie+" est-il une catégorie de "+concept+" ? "+result);
        /*while (rs.hasNext()) {
            QuerySolution soln = (QuerySolution) rs.next();
            result = soln.get("type").toString();
            System.out.println(soln.getResource("type")+" ou bien "+soln.get("type"));
        }*/
        qehttp.close();
        return result;
    }

    public String[] getGeneralisation(String concept) throws NoBaseException {
        CacheDatabase cdb = new CacheDatabase(myDB);
        /*if(cdb.isRegistered(concept)){
            String[] s = cdb.getCategories(concept);
            System.out.println("!!Traitement du concept : "+concept);
        System.out.println("*"+concept+" : "+s.length+" catégories récupérées dans la base.");
            return s;
        } else {*/
            String[] ontos = getHigherLevels();
            System.out.println("!!Traitement du concept : " + concept);
            System.out.println("*Concept absent de la base");
            Set<String> result = new HashSet<String>();
            for (int i = 0; i < ontos.length; i++) {
                String query = foaf + " " + rdfs + " " + rdf +
                        " SELECT DISTINCT ?cat WHERE {" +
                        " ?x rdfs:label ?label. FILTER (lang(?label) = 'fr')" +
                        " ?x rdf:type <" + ontos[i] + ">. FILTER (regex(?label, '" + concept + "', 'i'))" +
                        " ?x rdf:type ?cat." +
                        " } ";
                Query yourSparqlQuery = QueryFactory.create(query);
                // configuration to access the online endpoint
                QueryExecution qehttp = QueryExecutionFactory.sparqlService(remoteService, yourSparqlQuery);
                ResultSet rs = qehttp.execSelect();
                while (rs.hasNext()) {
                    QuerySolution soln = (QuerySolution) rs.next();
                    result.add(soln.get("cat").toString().trim());
                }
                qehttp.close();
            }
            System.out.println("*" + concept + " : " + result.size() + " catégories.");
            System.out.println("*Suppression des catégories Yago (résultat pertinent peu probable)");
            Vector<String> v = new Vector<String>(result);
            for (int i = 0; i < v.size(); i++) {
                String s = v.elementAt(i);
                if (s.indexOf("class/yago") > -1) {
                    result.remove(s);
                }
            }
            /*// Au cas (qui arrive parfois) où le concept serait une catégorie de lui-même)
            result.remove(concept);*/
            System.out.println("*" + concept + " : " + result.size() + " catégories conservées.");
            String[] res = new String[result.size()];
            result.toArray(res);
            for (int i = 0; i < res.length; i++) {
                res[i] = res[i].split("@")[0];
            }
            //cdb.setCategories(concept, res);
            return res;
        /*}*/
    }

    public String[] getSubGeneralisation(String concept) throws NoBaseException {
        CacheDatabase cdb = new CacheDatabase(myDB);
        /*if (cdb.isRegistered(concept)) {
            String[] s = cdb.getCategories(concept);
            System.out.println("##Traitement du concept : " + concept);
            System.out.println("***" + concept + " : " + s.length + " catégories récupérées dans la base.");
            return s;
        } else {*/
            String[] ontos = getHigherLevels();
            System.out.println("##Traitement du concept : " + concept);
            Set<String> result = new HashSet<String>();
            for (int i = 0; i < ontos.length; i++) {
                String query = foaf + " " + rdfs + " " + rdf +
                        " SELECT DISTINCT ?cat WHERE {" +
                        " <"+concept+"> rdfs:subClassOf ?cat. " +
                        " } ";
                Query yourSparqlQuery = QueryFactory.create(query);
                // configuration to access the online endpoint
                QueryExecution qehttp = QueryExecutionFactory.sparqlService(remoteService, yourSparqlQuery);
                ResultSet rs = qehttp.execSelect();
                while (rs.hasNext()) {
                    QuerySolution soln = (QuerySolution) rs.next();
                    result.add(soln.get("cat").toString().trim());
                }
                qehttp.close();
            }
            //Itération récursive
            Set<String> result2 = new HashSet<String>();
            Iterator<String> it = result.iterator();
            while(it.hasNext()){
                String s = it.next();
                String[] ss = getSubSubGeneralisation(s);
                for(int j=0; j<ss.length; j++){
                    result2.add(ss[j]);
                }
            }
            result.addAll(result2);
            // Au cas (qui arrive parfois) où le concept serait une catégorie de lui-même)
            result.remove(concept);
            System.out.println("***" + concept + " : " + result.size() + " catégories conservées.");
            String[] res = new String[result.size()];
            result.toArray(res);
            for (int i = 0; i < res.length; i++) {
                res[i] = res[i].split("@")[0];
            }
            //cdb.setCategories(concept, res);
            return res;
        /*}*/
    }

    //comme au-dessus, mais sans trace
    public String[] getSubSubGeneralisation(String concept) throws NoBaseException {
        CacheDatabase cdb = new CacheDatabase(myDB);
        /*if (cdb.isRegistered(concept)) {
            String[] s = cdb.getCategories(concept);
            return s;
        } else {*/
            String[] ontos = getHigherLevels();
            Set<String> result = new HashSet<String>();
            for (int i = 0; i < ontos.length; i++) {
                String query = foaf + " " + rdfs + " " + rdf +
                        " SELECT DISTINCT ?cat WHERE {" +
                        " <"+concept+"> rdfs:subClassOf ?cat. " +
                        " } ";
                Query yourSparqlQuery = QueryFactory.create(query);
                // configuration to access the online endpoint
                QueryExecution qehttp = QueryExecutionFactory.sparqlService(remoteService, yourSparqlQuery);
                ResultSet rs = qehttp.execSelect();
                while (rs.hasNext()) {
                    QuerySolution soln = (QuerySolution) rs.next();
                    result.add(soln.get("cat").toString().trim());
                }
                qehttp.close();
            }
            //Itération récursive
            Set<String> result2 = new HashSet<String>();
            Iterator<String> it = result.iterator();
            while(it.hasNext()){
                String s = it.next();
                String[] ss = getSubSubGeneralisation(s);
                for(int j=0; j<ss.length; j++){
                    result2.add(ss[j]);
                }
            }
            result.addAll(result2);
            // Au cas (qui arrive parfois) où le concept serait une catégorie de lui-même)
            result.remove(concept);
            String[] res = new String[result.size()];
            result.toArray(res);
            for (int i = 0; i < res.length; i++) {
                res[i] = res[i].split("@")[0];
            }
            //cdb.setCategories(concept, res);
            return res;
        /*}*/
    }

    public String[] getGeneralisationStricte(String concept, int niveaux) throws NoBaseException {
        // On va dire que niveaux = 1 pour l'instant.
        String[] premierResultat = getGeneralisation(concept);
        System.out.println("*"+premierResultat.length+" catégories trouvées en premier examen.");
        System.out.print("*");
        for(int i=0; i<premierResultat.length; i++){
            System.out.print(f(premierResultat[i])+" ; ");
        }
        System.out.println("");
        Set<String> aVerifier = new HashSet<String>();
        for(int i=0; i<premierResultat.length; i++){
            aVerifier.add(premierResultat[i]);
        }
        System.out.println("*"+aVerifier.size()+" catégories uniques.");
        if(aVerifier.size()>=100){
            System.out.println("*Trop de catégories. Mot vide.");
            aVerifier.clear();
        } else {
            Vector<String> liste = new Vector<String>(aVerifier);
            for (int i = 0; i < liste.size(); i++) {
                String s = liste.elementAt(i);
                if (aVerifier.contains(s)) { // sait-on jamais, on l'a peut-être déjà éliminé
                    String[] gene = getSubGeneralisation(s);
                    if (gene.length > 0) {
                        boolean removed = false;
                        for (int j = 0; j < gene.length; j++) {
                            if (aVerifier.contains(gene[j])) {
                                if(!removed){
                                    System.out.print("***Nettoyage de la liste des catégories : suppression de ");
                                    removed = true;
                                }
                                aVerifier.remove(gene[j]);
                                System.out.print(gene[j] + " ; ");
                            }
                        }
                        if(removed) System.out.println("");
                    }
                }
            }
        }
        String[] resultat = new String[aVerifier.size()];
        aVerifier.toArray(resultat);
        System.out.println("*"+resultat.length+" catégories retenues.");
        System.out.print("*");
        for(int i=0; i<resultat.length; i++){
            //On se passe de l'URI
            resultat[i] = f(resultat[i]);
            System.out.print(resultat[i]+" ; ");
        }
        System.out.println("");
        return resultat;
    }

    //pour afficher le nom du concept et non l'url entière
    private String f(String url){
        String[] s = url.split("\\/");
        return s[s.length-1];
    }

    public String[] getHigherLevels() throws NoBaseException {
        ArrayList ontologiesIDs = new ArrayList();
        Vector<String> ontologies = new Vector<String>();
        OntologieDatabase odb = new OntologieDatabase(myDB);
        ontologiesIDs = odb.getHigherNodes();
        for(int i = 0; i< ontologiesIDs.size(); i++){
            int id = Integer.parseInt(ontologiesIDs.get(i).toString());
            ontologies.add(odb.getRessource(id));
        }
        String[] result = new String[ontologies.size()];
        ontologies.toArray(result);
        return result;
    }

    public ArrayList getlowerLevels() throws NoBaseException {
        ArrayList ontologiesIDs = new ArrayList();
        ArrayList ontologies = new ArrayList();
        OntologieDatabase odb = new OntologieDatabase(myDB);
        ontologiesIDs = odb.getLowerNodes();
        for(int i = 0; i< ontologiesIDs.size(); i++){
            int id = Integer.parseInt(ontologiesIDs.get(i).toString());
            ontologies.add(odb.getRessource(id));
        }
        return ontologies;
    }
}
